import pandas as pd


df = pd.read_csv('./newdata/data.csv', encoding='utf-8')

df_movie = df['m_name']
df_director = df['director']
df_screenwriter = df['screenwriter']
df_actor = df['actor']
df_type = df['type']
df_area = df['area']
df_language = df['language']

movieID, directorID, screenwriterID, actorID, typeID, areaID, languageID = [], [], [], [], [], [], []

movieList = list(set(df_movie))
movie_cnt = len(movieList)
df_movie_name = pd.DataFrame(data=movieList, columns=['m_name'])

directorList = list(set(df_director))
director_cnt = len(directorList)
df_director_name = pd.DataFrame(data=directorList, columns=['director'])

screenwriterList = []
# count = 0
for sw in df_screenwriter:
    # count += 1
    # if type(sw) is not str:
    #     print(count)
    screenwriterList.extend(sw.split('/'))
screenwriterList = list(set(screenwriterList))
screenwriter_cnt = len(screenwriterList)
df_screenwriter_name = pd.DataFrame(data=screenwriterList, columns=['screenwriter'])

actorList = []
for act in df_actor:
    actorList.extend(act.split('/'))
actorList = list(set(actorList))
actor_cnt = len(actorList)
df_actor_name = pd.DataFrame(data=actorList, columns=['actor'])

typeList = []
for ty in df_type:
    typeList.extend(ty.split('/'))
typeList = list(set(typeList))
type_cnt = len(typeList)
df_type_name = pd.DataFrame(data=typeList, columns=['type'])

areaList = list(set(df_area))
area_cnt = len(areaList)
df_area_name = pd.DataFrame(data=areaList, columns=['area'])

languageList = []
for lan in df_language:
    languageList.extend(lan.split('/'))
languageList = list(set(languageList))
language_cnt = len(languageList)
df_language_name = pd.DataFrame(data=languageList, columns=['language'])

for i in range(100001, 100001+movie_cnt):
    movieID.append(i)
df_movie_ID = pd.DataFrame(data=movieID, columns=['m_id'])

for i in range(200001, 200001+director_cnt):
    directorID.append(i)
df_director_ID = pd.DataFrame(data=directorID, columns=['director_id'])

for i in range(300001, 300001+screenwriter_cnt):
    screenwriterID.append(i)
df_screenwriter_ID = pd.DataFrame(data=screenwriterID, columns=['screenwriter_id'])

for i in range(400001, 400001+actor_cnt):
    actorID.append(i)
df_actor_ID = pd.DataFrame(data=actorID, columns=['actor_id'])

for i in range(500001, 500001+type_cnt):
    typeID.append(i)
df_type_ID = pd.DataFrame(data=typeID, columns=['type_id'])

for i in range(600001, 600001+area_cnt):
    areaID.append(i)
df_area_ID = pd.DataFrame(data=areaID, columns=['area_id'])

for i in range(700001, 700001+language_cnt):
    languageID.append(i)
df_language_ID = pd.DataFrame(data=languageID, columns=['language_id'])

movie = pd.concat([df_movie_ID, df_movie_name], axis=1)
movie['label'] = 'movie'

director = pd.concat([df_director_ID, df_director_name], axis=1)
director['label'] = 'director'

screenwriter = pd.concat([df_screenwriter_ID, df_screenwriter_name], axis=1)
screenwriter['label'] = 'screenwriter'

actor = pd.concat([df_actor_ID, df_actor_name], axis=1)
actor['label'] = 'actor'

type = pd.concat([df_type_ID, df_type_name], axis=1)
type['label'] = 'type'

area = pd.concat([df_area_ID, df_area_name], axis=1)
area['label'] = 'area'

language = pd.concat([df_language_ID, df_language_name], axis=1)
language['label'] = 'language'

movie.columns = ['index:ID', 'movie', ':LABEL']
movie.to_csv('./newdata/movie.csv', index=False, encoding='utf-8_sig')
print('电影结点导出到csv成功')

director.columns = ['index:ID', 'director', ':LABEL']
director.to_csv('./newdata/director.csv', index=False, encoding='utf-8_sig')
print('导演结点导出到csv成功')

screenwriter.columns = ['index:ID', 'screenwriter', ':LABEL']
screenwriter.to_csv('./newdata/screenwriter.csv', index=False, encoding='utf-8_sig')
print('编剧结点导出到csv成功')

actor.columns = ['index:ID', 'actor', ':LABEL']
actor.to_csv('./newdata/actor.csv', index=False, encoding='utf-8_sig')
print('演员结点导出到csv成功')

type.columns = ['index:ID', 'type', ':LABEL']
type.to_csv('./newdata/type.csv', index=False, encoding='utf-8_sig')
print('类型结点导出到csv成功')

area.columns = ['index:ID', 'area', ':LABEL']
area.to_csv('./newdata/area.csv', index=False, encoding='utf-8_sig')
print('地区结点导出到csv成功')

language.columns = ['index:ID', 'language', ':LABEL']
language.to_csv('./newdata/language.csv', index=False, encoding='utf-8_sig')
print('语言结点导出到csv成功')
